if (!require("pacman")) install.packages("pacman")
Loading required package: pacman
pacman::p_load(tidyverse, janitor, colorspace, broom, fs, scales, ggthemes, ggrepel, patchwork, ggimage, jpeg, glue, grid, forcats)# set theme for ggplot2ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))# set width of code outputoptions(width =65)# set figure parameters for knitrknitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)
1 - Du Bois challenge.
# 1. Base datadu_bois_income <-read_csv("data/income.csv")
Rows: 7 Columns: 7
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (1): Class
dbl (6): Average_Income, Rent, Food, Clothes, Tax, Other
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Interpretation of the COVID-19 Vaccine Attitudes Visualization
This visualization is packed with information about how medical and nursing students across the U.S. feel about the COVID-19 vaccine. It’s a grid of facets where each column represents a different statement about the vaccine, rated on a Likert scale (1 = Strongly Agree to 5 = Strongly Disagree), and each row breaks down the responses by demographic factors like age, gender, race, profession, and vaccination status. The points show the mean score for each group, and the error bars stretch from the 10th to 90th percentiles, giving a sense of how much opinions vary. The first row, labeled “All,” shows the overall responses without splitting by any demographic factors.
Overall Observations
Starting with the “All” row, it’s cool to see that most students lean toward positive views about the vaccine. For statements like “I believe the vaccine is safe,” “Getting the vaccine will make me feel safer at work,” and “I will recommend the vaccine to others,” the mean scores hover around 1.5 to 2, which means they’re generally agreeing—either strongly or somewhat. That feels right to me since these are future healthcare pros who probably trust science. But then, for “I am concerned about the safety and side effects of the vaccine,” the mean jumps to about 3 (neutral), and the error bars go from like 1.5 to 4.5. So, even though they trust the vaccine, a lot of them are still worried about side effects, which I get because it was developed so fast.
Example 1: Asian Respondents’ Mixed Feelings
One thing that caught my eye was in the “Race” category with Asian students. For “I believe the vaccine is safe,” the error bars are huge—like, they go from 1 to 5! That means some strongly agree it’s safe, while others totally disagree, which is wild variation. I didn’t expect that much difference within one group. But then, for “I will recommend the vaccine to others,” the mean is around 2, and the error bars are way tighter, maybe 1 to 3. That’s weird to me—if you’re all over the place on safety, I’d think you’d be unsure about recommending it too. Maybe they feel they should promote it, even if they’re not totally sold on it themselves, like it’s their duty or something during a pandemic.
Example 2: Nursing vs. Medical Students
Looking at the “Profession” row, I noticed nursing and medical students don’t line up as much as I thought they would. For stuff like “I trust the information I have received,” nursing students have tight error bars, maybe 1 to 2.5, so they’re pretty consistent. Medical students, though, have wider ones, like 1 to 4, showing more mixed opinions. I figured both groups would think alike since they’re both in healthcare and learning the same science. This makes me wonder if medical students are more skeptical because they’re digging into research more, while nursing students might just go with what they’re taught. It’s not what I expected!
Example 3: Vaccinated Students Still Worried
In the “Had COVID vaccine” row, students who said “Yes” to being vaccinated have means around 1 to 1.5 for positive statements like “I believe the vaccine is safe,” which makes sense—if you got it, you probably trust it. But for “I am concerned about side effects,” the mean is still around 3, with error bars from 1.5 to 4.5. That’s interesting because I thought if you’re vaccinated, you’d be less worried. It shows even people who took it aren’t totally chill about risks, which fits with how some might’ve gotten it for school or work but still have doubts about a new vaccine.
Wrapping Up
This plot shows that medical and nursing students mostly trust the COVID-19 vaccine, but they’re not all-in—side effect worries are real across the board. The differences between groups, like race or profession, add layers I didn’t expect, making it clear that even future doctors and nurses don’t all see it the same way. It’s pretty fascinating how complicated their views are!
3 - COVID survey - reconstruct
# Loading Datacovid_check <-read.csv("data/covid-survey.csv")#head(covid)# After watching the head of the data set we need to remove the first row which just says this - 'likert_survey' And doesn't have actual datacovid <-read.csv("data/covid-survey.csv", skip =1)dim(covid)
[1] 1121 14
# I would have add [na = c(".", "")] during loading data but just to follow along the step doing it separatly below.covid_filtered_na <- covid |>mutate(across(-response_id, ~na_if(trimws(.), ""))) |>mutate(across(-response_id, ~na_if(., "."))) |>filter(if_any(-response_id, ~!is.na(.)))dim(covid_filtered_na)
[1] 1111 14
# Updating Labelscovid_relabeled <- covid_filtered_na |>mutate(exp_already_vax =recode(exp_already_vax, `0`="No", `1`="Yes"),exp_flu_vax =recode(exp_flu_vax, `0`="No", `1`="Yes"),exp_profession =recode(exp_profession, `0`="Medical", `1`="Nursing"),exp_gender =recode(exp_gender,`0`="Male",`1`="Female",`3`="Non-binary third gender",`4`="Prefer not to say"),exp_race =recode(exp_race,`1`="American Indian / Alaskan Native",`2`="Asian",`3`="Black / African American",`4`="Native Hawaiian / Other Pacific Islander",`5`="White"),exp_ethnicity =recode(exp_ethnicity,`1`="Hispanic / Latino",`2`="Non-Hispanic / Non-Latino"),exp_age_bin =recode(exp_age_bin,`0`="<20",`20`="21–25",`25`="26–30",`30`=">30") )dim(covid_relabeled)
[1] 1111 14
covid_survey_longer <- covid_relabeled |># This pivot longer combines all explanatory columns and their values into two columns. Pivoting multiple columns of data into two columns to be able to address easily while plotting.pivot_longer(cols =starts_with("exp_"),names_to ="explanatory",values_to ="explanatory_value" ) |>filter(!is.na(explanatory_value)) |># This pivot longer combines all response value columns into and their values into two columns. Similar to the above one it will also help in plotting the data.pivot_longer(cols =starts_with("resp_"),names_to ="response",values_to ="response_value" )covid_survey_longer
# A tibble: 19 × 1
explanatory_value
<chr>
1 Nursing
2 Yes
3 Male
4 Asian
5 Non-Hispanic / Non-Latino
6 26–30
7 Female
8 21–25
9 White
10 >30
11 Non-binary third gender
12 Hispanic / Latino
13 Black / African American
14 No
15 American Indian / Alaskan Native
16 Medical
17 <20
18 Prefer not to say
19 Native Hawaiian / Other Pacific Islander
Warning: There were 3 warnings in `mutate()`.
The first warning was:
ℹ In argument: `explanatory_value =
fct_relevel(explanatory_value, ">30", "26-30", "21-25",
"<20")`.
Caused by warning:
! 2 unknown levels in `f`: 26-30 and 21-25
ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining
warnings.
# Set labelscovid_survey_summary_stats <- covid_survey_summary_stats |>mutate(explanatory =recode(explanatory, "exp_age_bin"='Age', "exp_gender"='Gender', "exp_race"='Race', "exp_ethnicity"='Ethnicity', "exp_profession"='Profession', "exp_already_vax"='Had COVID vaccine', "exp_flu_vax"='Had flu vaccine this year'),response =fct_relevel(response, "resp_safety", "resp_feel_safe_at_work", "resp_concern_safety", "resp_confidence_science", "resp_trust_info", "resp_will_recommend"),response =recode(response,"resp_safety"='Based on my understanding, I believe the vaccine is safe',"resp_confidence_science"='I am confident in the scientific vetting process for the new COVID vaccines',"resp_feel_safe_at_work"='Getting the vaccine will make me feel safer at work',"resp_will_recommend"='I will recommend the vaccine to family, friends, and community members',"resp_trust_info"='I trust the information that I have received about the vaccines',"resp_concern_safety"='I am concerned about the safety and side effects of the vaccine') )# Creating the plotcovid_survey_summary_stats |>ggplot(aes(x = mean, y =factor(explanatory_value))) +geom_point(size =0.75) +geom_errorbarh(aes(xmin = low, xmax = high), height =0.3) +facet_grid(rows =vars(explanatory),cols =vars(response),scales ="free_y",space ="free_y",labeller =labeller(explanatory =label_wrap_gen(15), response =label_wrap_gen(15)) ) +scale_x_continuous(breaks =1:5) +labs(x ="Mean Likert score\n(Error bars range from 10th to 90th percentile)",y =NULL, ) +theme(strip.text =element_text(size =6),strip.text.y =element_text(angle =0),axis.text.y =element_text(size =6),axis.text.x =element_text(size =6),panel.spacing =unit(0, "lines"),panel.spacing.x =unit(0.3, "lines"),axis.title.x =element_text(size =8),panel.grid =element_blank(),strip.background =element_rect(fill ="gray90", color ="black") )
Warning: There were 3 warnings in `mutate()`.
The first warning was:
ℹ In argument: `explanatory_value =
fct_relevel(explanatory_value, ">30", "26-30", "21-25",
"<20")`.
Caused by warning:
! 2 unknown levels in `f`: 26-30 and 21-25
ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining
warnings.
# Set labelscovid_survey_summary_stats_2 <- covid_survey_summary_stats_2 |>mutate(explanatory =recode(explanatory, "exp_age_bin"='Age', "exp_gender"='Gender', "exp_race"='Race', "exp_ethnicity"='Ethnicity', "exp_profession"='Profession', "exp_already_vax"='Had COVID vaccine', "exp_flu_vax"='Had flu vaccine this year'),response =fct_relevel(response, "resp_safety", "resp_feel_safe_at_work", "resp_concern_safety", "resp_confidence_science", "resp_trust_info", "resp_will_recommend"),response =recode(response,"resp_safety"='Based on my understanding, I believe the vaccine is safe',"resp_confidence_science"='I am confident in the scientific vetting process for the new COVID vaccines',"resp_feel_safe_at_work"='Getting the vaccine will make me feel safer at work',"resp_will_recommend"='I will recommend the vaccine to family, friends, and community members',"resp_trust_info"='I trust the information that I have received about the vaccines',"resp_concern_safety"='I am concerned about the safety and side effects of the vaccine') )# Creating the plotcovid_survey_summary_stats_2 |>ggplot(aes(x = mean, y =factor(explanatory_value))) +geom_point(size =0.75) +geom_errorbarh(aes(xmin = low, xmax = high), height =0.3) +facet_grid(rows =vars(explanatory),cols =vars(response),scales ="free_y",space ="free_y",labeller =labeller(explanatory =label_wrap_gen(15), response =label_wrap_gen(15)) ) +scale_x_continuous(breaks =1:5) +labs(x ="Mean Likert score\n(Error bars range from 25th to 75th percentile)",y =NULL, ) +theme(strip.text =element_text(size =6),strip.text.y =element_text(angle =0),axis.text.y =element_text(size =6),axis.text.x =element_text(size =6),panel.spacing =unit(0, "lines"),panel.spacing.x =unit(0.3, "lines"),axis.title.x =element_text(size =8),panel.grid =element_blank(),strip.background =element_rect(fill ="gray90", color ="black") )